
R version 3.2.5 (2016-04-14) -- "Very, Very Secure Dishes"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64 (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

Microsoft R Open 3.2.5
Default CRAN mirror snapshot taken on 2016-05-01
The enhanced R distribution from Microsoft
Visit https://mran.microsoft.com/ for information
about additional features.

Multithreaded BLAS/LAPACK libraries detected. Using 12 cores for math algorithms.
> 
> rm(list=ls())
> set.seed(21301)
> setwd("D:/Dropbox/jesarey_documents/Measuring Publication Bias/pub-bias-replication-files")
> 
> library(foreign)
> library(ggplot2)
> 
> my.data.set<-read.dta(file="publication_data_2016-5-29.dta")
> data.full<-my.data.set[which(my.data.set$number_of_dv==1),]
> 
> # count articles before pruning
> dim(data.full) # how big data set?
[1] 314  20
> sum(data.full$journal=="APSR")
[1] 139
> sum(data.full$journal=="AJPS")
[1] 175
> 
> # continuous dependent variables only, first DV
> data<-my.data.set[ which(my.data.set$model==1 & my.data.set$number_of_dv==1),]
> 
> # removing observations with missing values
> est.miss<-is.na(data$estimate)
> se.miss<-is.na(data$se)
> size.miss<-is.na(data$sample_size)
> data$miss<-est.miss+se.miss+size.miss
> # if any num_ivs are missing, set them to 2 (the smallest reasonable choice)
> data$num_ivs[which(is.na(data$num_ivs))] <- 2
> 
> dat<-subset(data, subset=data$miss<1, select=c("journal", "title", "estimate", "se", "sample_size", "tails", "num_ivs", "doi"))
> dim(dat) # how big data set?
[1] 167   8
> sum(dat$journal=="APSR")
[1] 70
> sum(dat$journal=="AJPS")
[1] 97
> 
> 
> sig.cut<- abs(qt(0.025, df= (dat$sample_size - dat$num_ivs - 1)))
> dat$notsig<-ifelse(abs(dat$estimate/dat$se)<sig.cut,1,0)
> 
> sum(dat$notsig) # how many non-significant results published? = 25
[1] 25
> sum(dat$notsig)/length(dat$notsig) # what % of non-significant results published? = 0.1497006
[1] 0.1497006
> 
> 
> # exclude the non-significant results and observations
> dat<-subset(dat, subset=(dat$notsig==0))
> t.dat<-dat$estimate/dat$se
> n.dat<-dat$sample_size
> 
> dim(dat) # how big data set?
[1] 142   9
> 
> attach(dat)
> 
> # a sub-function to generate a large set of draws from the prior
> # for beta, simulate sampling variation using the estimated se for
> # beta, and then calculate publication bias
> 
> # for spike-and-slab distributed beta
> 
> gen.samp <- function(se.b, df.b, pr.false, n.draws, bound, seed, alpha){
+   
+   if(seed!="off"){
+     set.seed(seed)
+   }
+   b.true <- runif(n.draws, min=(-1)*bound, max=bound)
+   b.false <- rep(0, n.draws)
+   choice <- ifelse(runif(n.draws)<(1-pr.false), 1, 0)
+   b.test <- ifelse(choice==1, b.true, b.false)
+   
+   b.test.est <- b.test + se.b*rt(n.draws, df=df.b)
+   
+   sig <- ifelse( abs(b.test.est/se.b) >= abs(qt((alpha/2), df=df.b)), 1, 0 )
+   
+   list.out <- list(b.true = b.test, b.est = b.test.est, b.sig = sig)
+   
+   return(list.out)
+   
+ }
> 
> 
> 
> bias.calc <- function(b, se.b, df.b, pr.false, bound = 3 * abs(b), n.draws = 100000, seed=123456, alpha=0.05 ){
+   
+   list.store <- gen.samp(se.b, df.b, pr.false, n.draws, bound, seed, alpha)
+   sig <- which(list.store$b.sig==1)
+   bias <- ( list.store$b.est[sig] - list.store$b.true[sig] )
+   mult <- sign(list.store$b.true[sig])
+   mult <- ifelse(mult==0, sign(bias), mult)
+   #return( mean(mult*bias)/b * sign(b) )
+   return( mean(mult*bias)/abs(b) )
+ }
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9)
+   
+ }
> mean(b.bias)                                   # 0.298437             
[1] 0.298437
> length(which(b.bias>=0.1))/length(b.bias)      # 0.8732394
[1] 0.8732394
> 
> dat$b.bias.plot <- 100*(b.bias)
> 
> # make category of p-value
> dat$t <- abs( dat$estimate / dat$se )
> dat$p <- 2*(1 - pt(q=dat$t, df=(dat$sample_size-dat$num_ivs-1)))
> dat$p.cat <- cut_interval(dat$p, length=0.01)
> dat$bias.cat <- cut_interval(dat$b.bias.plot, length=10)
> 
> # some helpful code was taken from
> # http://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2
> # and http://stackoverflow.com/questions/14622421/how-to-change-legend-title-in-ggplot-density
> # and http://stackoverflow.com/questions/5981601/recommendations-for-black-and-white-color-scheme-with-ggplot2
> p <- ggplot(dat, aes(x = bias.cat)) +  geom_bar(aes(y = (..count..)/sum(..count..), fill=p.cat)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size=12)) + labs(x = "Expected Bias (as % of published estimate)", y = "Frequency") + guides(fill = guide_legend("Result p-value")) +  scale_fill_grey(start = 0.2, end = 0.7) + annotate("text", label = "mean bias = 29.8%", y = 0.265, x=3.5, size = 3) + ggtitle("baseline 10% chance of non-zero relationship")
> 
> p
> ggsave(filename="bias-hist.pdf", plot=p, device="pdf", width=6, height=5)
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.8)
+   
+ }
> mean(b.bias)                                    # 0.188862
[1] 0.188862
> length(which(b.bias>=0.1))/length(b.bias)       # 0.7816901
[1] 0.7816901
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.5)
+   
+ }
> mean(b.bias)                                     # 0.08906531
[1] 0.08906531
> length(which(b.bias>=0.1))/length(b.bias)        # 0.415493
[1] 0.415493
> 
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0)
+   
+ }
> mean(b.bias)                                     # 0.04733124
[1] 0.04733124
> length(which(b.bias>=0.1))/length(b.bias)        # 0.1408451
[1] 0.1408451
> 
> 
> dat$b.bias.plot <- 100*(b.bias)
> 
> # make category of p-value
> dat$t <- abs( dat$estimate / dat$se )
> dat$p <- 2*(1 - pt(dat$t, (dat$sample_size-dat$num_ivs-1)))
> dat$p.cat <- cut_interval(dat$p, length=0.01)
> dat$bias.cat <- cut_interval(dat$b.bias.plot, length=2)
> 
> # some helpful code was taken from
> # http://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2
> # and http://stackoverflow.com/questions/14622421/how-to-change-legend-title-in-ggplot-density
> # and http://stackoverflow.com/questions/5981601/recommendations-for-black-and-white-color-scheme-with-ggplot2
> p <- ggplot(dat, aes(x = bias.cat)) +  geom_bar(aes(y = (..count..)/sum(..count..), fill=p.cat)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size=12)) + labs(x = "Expected Bias (as % of published estimate)", y = "Frequency") + guides(fill = guide_legend("Result p-value")) +  scale_fill_grey(start = 0.2, end = 0.7) + annotate("text", label = "mean bias = 4.73%", y = 0.265, x=4, size = 3) + ggtitle("baseline 100% chance of non-zero relationship")
> 
> p
> ggsave(filename="bias-hist-nospike.pdf", plot=p, device="pdf", width=6, height=5)
> 
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9, bound = 10*abs(dat$estimate[i]))
+   
+ }
> mean(b.bias)                                                    # 0.2438803
[1] 0.2438803
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)        # 0.5070423
[1] 0.5070423
> 
> 
> 
> 
> 
> # what about a lower threshold for significance?
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9, alpha=0.01)
+   
+ }
> mean(b.bias)                                   # 0.1556354             
[1] 0.1556354
> length(which(b.bias>=0.1))/length(b.bias)      # 0.6690141
[1] 0.6690141
> 
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0, alpha=0.01)
+   
+ }
> mean(b.bias)                                     # 0.0559474
[1] 0.0559474
> length(which(b.bias>=0.1))/length(b.bias)        # 0.2042254
[1] 0.2042254
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> # now repeat the analysis above for a normal prior
> 
> gen.norm.samp <- function(se.b, df.b, pr.false, n.draws, bound, seed, alpha){
+   
+   if(seed!="off"){
+     set.seed(seed)
+   }
+   b.true <- rnorm(n.draws, mean=0, sd=bound)
+   b.false <- rep(0, n.draws)
+   choice <- ifelse(runif(n.draws)<(1-pr.false), 1, 0)
+   b.test <- ifelse(choice==1, b.true, b.false)
+   
+   b.test.est <- b.test + se.b*rt(n.draws, df=df.b)
+   
+   sig <- ifelse( abs(b.test.est/se.b) >= abs(qt((alpha/2), df=df.b)), 1, 0 )
+   list.out <- list(b.true = b.test, b.est = b.test.est, b.sig = sig)
+   return(list.out)
+   
+ }
> 
> 
> 
> bias.calc.norm <- function(b, se.b, df.b, pr.false, bound = abs(b), n.draws = 100000, seed=123456, alpha=0.05 ){
+   
+   list.store <- gen.norm.samp(se.b, df.b, pr.false, n.draws, bound, seed, alpha)
+   sig <- which(list.store$b.sig==1)
+   bias <- ( list.store$b.est[sig] - list.store$b.true[sig] )
+   mult <- sign(list.store$b.true[sig])
+   mult <- ifelse(mult==0, sign(bias), mult)
+   return( mean(mult*bias)/b * sign(b) )
+   
+ }
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9)
+   
+ }
> mean(b.bias)                                                         # 0.4046605
[1] 0.4046605
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.528169
[1] 0.528169
> 
> dat$b.bias.plot <- 100*(b.bias)
> 
> # make category of p-value
> dat$t <- abs( dat$estimate / dat$se )
> dat$p <- 2*(1 - pt(dat$t, (dat$sample_size-dat$num_ivs-1)))
> dat$p.cat <- cut_interval(dat$p, length=0.01)
> dat$bias.cat <- cut_interval(dat$b.bias.plot, length=15)
> 
> # some helpful code was taken from
> # http://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2
> # and http://stackoverflow.com/questions/14622421/how-to-change-legend-title-in-ggplot-density
> # and http://stackoverflow.com/questions/5981601/recommendations-for-black-and-white-color-scheme-with-ggplot2
> p <- ggplot(dat, aes(x = bias.cat)) +  geom_bar(aes(y = (..count..)/sum(..count..), fill=p.cat)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size=12)) + labs(x = "Expected Bias (as % of published estimate)", y = "Frequency") + guides(fill = guide_legend("Result p-value")) +  scale_fill_grey(start = 0.2, end = 0.7) + annotate("text", label = "mean bias = 40.5%", y = 0.315, x=4, size = 3) + ggtitle("baseline 10% chance of non-zero relationship")
> 
> p
> ggsave(filename="bias-hist-norm.pdf", plot=p, device="pdf", width=6, height=5)
> 
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.8)
+   
+ }
> mean(b.bias)                                                         # 0.2924327
[1] 0.2924327
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.4788732
[1] 0.4788732
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.5)
+   
+ }
> mean(b.bias)                                                         # 0.177636
[1] 0.177636
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.443662
[1] 0.443662
> 
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0)
+   
+ }
> mean(b.bias)                                                         # 0.1237288
[1] 0.1237288
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.4225352
[1] 0.4225352
> 
> dat$b.bias.plot <- 100*(b.bias)
> 
> # make category of p-value
> dat$t <- abs( dat$estimate / dat$se )
> dat$p <- 2*(1 - pt(dat$t, (dat$sample_size-dat$num_ivs-1)))
> dat$p.cat <- cut_interval(dat$p, length=0.01)
> dat$bias.cat <- cut_interval(dat$b.bias.plot, length=5)
> 
> # some helpful code was taken from
> # http://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2
> # and http://stackoverflow.com/questions/14622421/how-to-change-legend-title-in-ggplot-density
> # and http://stackoverflow.com/questions/5981601/recommendations-for-black-and-white-color-scheme-with-ggplot2
> p <- ggplot(dat, aes(x = bias.cat)) +  geom_bar(aes(y = (..count..)/sum(..count..), fill=p.cat)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), plot.title = element_text(size=12)) + labs(x = "Expected Bias (as % of published estimate)", y = "Frequency") + guides(fill = guide_legend("Result p-value")) +  scale_fill_grey(start = 0.2, end = 0.7) + annotate("text", label = "mean bias = 12.4%", y = 0.275, x=3.5, size = 3) + ggtitle("baseline 100% chance of non-zero relationship")
> 
> p
> ggsave(filename="bias-hist-norm-nospike.pdf", plot=p, device="pdf", width=6, height=5)
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), bound = 10*abs(dat$estimate[i]) , p=0.9)
+   
+ }
> mean(b.bias)                                                         # 0.2415869
[1] 0.2415869
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.5070423
[1] 0.5070423
> 
> 
> # what about a lower threshold for significance?
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9, alpha=0.01)
+   
+ }
> mean(b.bias)                                                         # 0.2959394
[1] 0.2959394
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.471831
[1] 0.471831
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0, alpha=0.01)
+   
+ }
> mean(b.bias)                                                         # 0.1491986
[1] 0.1491986
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.4366197
[1] 0.4366197
> 
> 
> 
> 
> 
> # calculate Pr(null | significant) using Bayes' Rule:
> #
> #
> #                                    Pr(significant | null) * Pr(null) 
> # Pr(null | significant) = ----------------------------------------------------------------
> #                           Pr (sig | null) * Pr(null) + Pr(sig | not null) * Pr(not null)
> # 
> # set Pr(sig | not null) = 1 to establish a lower bound
> # set Pr(not null) = 1 - Pr(null) by identity
> 
> calc.null.sig <- function(pr.sig.given.null, pr.null){
+   
+   (pr.sig.given.null * pr.null) / ( ( pr.sig.given.null * pr.null ) + (1 - pr.null) )
+   
+ }
> 
> pr.null.plot <- seq(from=0.01, to=0.99, by=0.01)
> null.sig.plot <- calc.null.sig(pr.sig.given.null=0.05, pr.null.plot)
> 
> pr.sig.null.plot <- seq(from=0.001, to=0.05, by=0.001)
> null.sig.plot <- calc.null.sig(pr.sig.null.plot, pr.null=0.95)
> null.sig.plot.2 <- calc.null.sig(pr.sig.null.plot, pr.null=0.9)
> null.sig.plot.3 <- calc.null.sig(pr.sig.null.plot, pr.null=0.75)
> null.sig.plot.4 <- calc.null.sig(pr.sig.null.plot, pr.null=0.5)
> 
> dat$t <- abs( dat$estimate / dat$se )
> dat$p <- 2*(1 - pt(dat$t, (dat$sample_size-dat$num_ivs-1)))
> 
> plotdat <- data.frame(sig.null = rep(pr.sig.null.plot,4), null.sig = c(null.sig.plot, null.sig.plot.2, null.sig.plot.3, null.sig.plot.4), pr.null = as.factor(rep(c(0.95, 0.9, 0.75, 0.5), each=length(pr.sig.null.plot))) )
> 
> # used some helpful code from
> # http://stackoverflow.com/questions/16712800/overlay-lines-and-hist-with-ggplot2
> 
> p <- ggplot(plotdat) + geom_line(aes(x=sig.null, y=null.sig, linetype=pr.null)) + theme_bw() + labs(x = "Pr(statistically significant | null)", y = "Lines: Pr(null | statistically significant) \n Histogram: Proportion of Published p-values") + guides(linetype = guide_legend("Prior Pr(null)")) + scale_y_continuous(breaks=seq(from=0, to=0.6, by=0.05))  + scale_x_continuous(breaks=seq(from=0, to=0.05, by=0.01)) + geom_rug(col=rgb(.5,0,0,alpha=.2)) + geom_histogram(data=dat, aes(x=p, y = ..count../sum(..count..)), alpha=0.2, binwidth=0.005)
> p
> ggsave(filename="false-positives.pdf", plot=p, device="pdf", width=6, height=5)
> 
> 
> # calculate mean lower bound Pr(null)
> null.prob <- calc.null.sig(pr.sig.given.null=dat$p, 0.75)
> length(which(null.prob >= 0.10)) / length(null.prob)        # 0.1056338
[1] 0.1056338
> null.prob <- calc.null.sig(pr.sig.given.null=dat$p, 0.9)
> length(which(null.prob >= 0.10)) / length(null.prob)        # 0.2535211
[1] 0.2535211
> null.prob <- calc.null.sig(pr.sig.given.null=dat$p, 0.95)
> length(which(null.prob >= 0.10)) / length(null.prob)        # 0.4084507
[1] 0.4084507
> 
> 
> 
> 
> 
> 
> 
> 
> 
> 
> ####################################
> # Appendix calculations 
> ####################################
> 
> # a sub-function to generate a large set of draws from the prior
> # for beta, simulate sampling variation using the estimated se for
> # beta, and then indicate true and false positives
> 
> # for spike-and-slab distributed beta
> 
> gen.samp <- function(se.b, df.b, pr.false, n.draws, bound, seed){
+   
+   if(seed!="off"){
+     set.seed(seed)
+   }
+   b.true <- runif(n.draws, min=(-1)*bound, max=bound)
+   b.false <- rep(0, n.draws)
+   choice <- ifelse(runif(n.draws)<(1-pr.false), 1, 0)
+   b.test <- ifelse(choice==1, b.true, b.false)
+   
+   b.test.est <- b.test + se.b*rt(n.draws, df=df.b)
+   
+   sig <- ifelse( abs(b.test.est/se.b) >= abs(qt(0.025, df=df.b)), 1, 0 )
+   
+   list.out <- list(b.true = b.test, b.est = b.test.est, b.sig = sig)
+   
+   return(list.out)
+   
+ }
> 
> 
> 
> bias.calc <- function(b, se.b, df.b, pr.false, bound = 3 * abs(b), n.draws = 100000, seed=123456 ){
+   
+   list.store <- gen.samp(se.b, df.b, pr.false, n.draws, bound, seed)
+   sig <- which(list.store$b.sig==1)
+   bias <- list.store$b.est[sig] - list.store$b.true[sig]
+   bias.mod <- loess(bias ~ list.store$b.est[sig], control=loess.control(trace.hat = "approximate"), span=0.2)
+   out <-predict(bias.mod, newdata = b)
+   return(out)
+   
+ }
> 
> 
> some.draws <- gen.samp(se.b = 0.4, df = 100, pr.false = 0, n.draws = 30000, bound = 3, seed = 123456)
> sig <- which(some.draws$b.sig==1)
> plot.data = data.frame(b.est = some.draws$b.est[sig], b.true = some.draws$b.true[sig])
> b.est.given.b.true <- predict( loess(b.est ~ b.true, data=plot.data, span=0.2), data.frame(b.true = seq(-3,3,0.01)) )
> plot.data.1 <- data.frame(b.est = b.est.given.b.true, b.true = seq(-3,3,0.01), type = rep(1, length(seq(-3,3,0.01))))
> b.true.given.b.est <- predict( loess(b.true ~ b.est, data=plot.data, span=0.2), data.frame(b.est = seq(-3,3,0.01)) )
> plot.data.2 <- data.frame(b.est = seq(-3,3,0.01), b.true = b.true.given.b.est, type = rep(2, length(seq(-3,3,0.01))))
> plot.data.3 <- rbind(plot.data.1, plot.data.2)
> plot.data.3$type <- factor(plot.data.3$type, labels=c("E[beta estimate | true beta]", "E[true beta | beta estimate]"))
> 
> p <- ggplot(plot.data) + geom_point(aes(x=b.true, y=b.est), col="darkgray") + geom_line(data = plot.data.3, aes(x=b.true, y=b.est, linetype=type)) + scale_x_continuous(limits=c(-2,2)) + scale_y_continuous(limits=c(-2,2)) + theme(legend.position = "top") + scale_linetype("") + labs(x = "true beta", y = "beta estimate")
> suppressWarnings(print(p))
> suppressWarnings(ggsave(filename="different-biases.pdf", plot=p, device="pdf", width=6, height=6))
> 
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9)
+   
+ }
> mean(b.bias)                  
[1] 56.01456
> mean(b.bias/dat$estimate)                                     # 0.3798161
[1] 0.3798161
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)      # 0.6478873
[1] 0.6478873
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.8)
+   
+ }
> mean(b.bias)
[1] 45.89629
> mean(b.bias/dat$estimate)                                      # 0.2774034
[1] 0.2774034
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)       # 0.584507
[1] 0.584507
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.5)
+   
+ }
> mean(b.bias)
[1] 25.20506
> mean(b.bias/dat$estimate)                                       # 0.1221786
[1] 0.1221786
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)        # 0.4507042
[1] 0.4507042
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0)
+   
+ }
> mean(b.bias)
[1] 0.6806784
> mean(b.bias/dat$estimate)                                       # -0.0006579831
[1] -0.0006579831
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)        # 0
[1] 0
> 
> 
> 
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9, bound = 10*abs(dat$estimate[i]))
+   
+ }
> mean(b.bias)
[1] 61.9434
> mean(b.bias/dat$estimate)                                       # 0.5318233
[1] 0.5318233
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)        # 0.7605634
[1] 0.7605634
> 
> 
> 
> 
> 
> 
> 
> 
> # now repeat the analysis above for a normal prior
> 
> gen.norm.samp <- function(se.b, df.b, pr.false, n.draws, bound, seed){
+   
+   if(seed!="off"){
+     set.seed(seed)
+   }
+   b.true <- rnorm(n.draws, mean=0, sd=bound)
+   b.false <- rep(0, n.draws)
+   choice <- ifelse(runif(n.draws)<(1-pr.false), 1, 0)
+   b.test <- ifelse(choice==1, b.true, b.false)
+   
+   b.test.est <- b.test + se.b*rt(n.draws, df=df.b)
+   
+   sig <- ifelse( abs(b.test.est/se.b) >= abs(qt(0.025, df=df.b)), 1, 0 )
+   
+   list.out <- list(b.true = b.test, b.est = b.test.est, b.sig = sig)
+   
+   return(list.out)
+   
+ }
> 
> 
> 
> bias.calc.norm <- function(b, se.b, df.b, pr.false, bound = abs(b), n.draws = 100000, seed=123456 ){
+   
+   list.store <- gen.norm.samp(se.b, df.b, pr.false, n.draws, bound, seed)
+   sig <- which(list.store$b.sig==1)
+   bias <- list.store$b.est[sig] - list.store$b.true[sig]
+   bias.mod <- loess(bias ~ list.store$b.est[sig], control=loess.control(trace.hat = "approximate"), span=0.2)
+   out <-predict(bias.mod, newdata = b)
+   return(out)
+   
+ }
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.9)
+   
+ }
> mean(b.bias)
[1] 53.29323
> mean(b.bias/dat$estimate)                                            # 0.3798531
[1] 0.3798531
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.6690141
[1] 0.6690141
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.8)
+   
+ }
> mean(b.bias)
[1] 45.13613
> mean(b.bias/dat$estimate)                                            # 0.2895558
[1] 0.2895558
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.6126761
[1] 0.6126761
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0.5)
+   
+ }
> mean(b.bias)
[1] 27.98599
> mean(b.bias/dat$estimate)                                            # 0.1711088
[1] 0.1711088
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.556338
[1] 0.556338
> 
> 
> 
> 
> b.bias<-c()
> for(i in 1:dim(dat)[1]){
+   
+   b.bias[i]<-bias.calc.norm(dat$estimate[i], dat$se[i], (dat$sample_size[i]-dat$num_ivs[i]-1), p=0)
+   
+ }
> mean(b.bias)
[1] 12.85284
> mean(b.bias/dat$estimate)                                            # 0.09461296
[1] 0.09461296
> length(which((b.bias/dat$estimate)>=0.1))/length(b.bias)             # 0.471831
[1] 0.471831
> 
> 
> detach(dat)
> 
> 
> proc.time()
   user  system elapsed 
1606.06  107.60  616.93 
